Introduction

This R Markdown file will do the following:

  1. Download the latest SDG indicator data from UN Stats (https://unstats.un.org/sdgs/indicators/en/#) using their API

  2. Transform the data so that for each indicator we can create a score documenting whether a value exists for the country in a year, whether the value is based on country data, country data adjusted, estimated, or modelled data according the UN Stats metadata. This will only include tier 1 indicators.

  3. Combine the resulting data into a single file for use in the Statistical Performance Indicators dashboard and index

UN Stats Database

Below is a paraphrased description from the UN stats webpage (https://unstats.un.org/sdgs/indicators/indicators-list/):

The global indicator framework for Sustainable Development Goals was developed by the Inter-Agency and Expert Group on SDG Indicators (IAEG-SDGs) and agreed upon at the 48th session of the United Nations Statistical Commission held in March 2017.

The global indicator framework includes 231 unique indicators. Please note that the total number of indicators listed in the global indicator framework of SDG indicators is 247. However, twelve indicators repeat under two or three different targets.

For each value of the indicator, the responsible international agency has been requested to indicate whether the national data were adjusted, estimated, modelled or are the result of global monitoring. The “nature” of the data in the SDG database is determined as follows:

Scoring

For each indicator, we will produce a value for each country with the following coding scheme:

API

Now we will pull data from the UN Stats API (https://unstats.un.org/SDGAPI/swagger/). We will keep only the tier 1 indicators

#get a list of all SDG indicators
indicators_url <- 'https://unstats.un.org/SDGAPI/v1/sdg/Indicator/List?pageSize=10000'

#bring in the list of indicators
list_df <- jsonlite::fromJSON(indicators_url, flatten = TRUE) %>%
  as_tibble() %>%
  unnest(keep_empty = TRUE) %>%
  filter(tier==1)  # keep just the tier 1 indicators %>%
  # group_by(code) %>%
  # filter(row_number()==1)
#remove this dataframe if it exists
if (exists('un_sdg_df')) {
  rm(un_sdg_df)
}

for (series in list_df$code1) {
    
  #read in files
  if (file.exists(paste(raw_dir,'/sdg_data/',series,'.csv',sep=""))) {
    temp <- read_csv(paste(raw_dir,'/sdg_data/',series,'.csv',sep="")) %>%
      mutate(
        date=as.numeric(date), 
        goal=as.character(goal), 
        target=as.character(target), 
        code==as.character(code), 
        code1=as.character(code1), 
        description1=as.character(description1)
      )
    
    #   #now append it to the overall database
    if (!exists('un_sdg_df')) {
      un_sdg_df <- temp
    } else if (exists('un_sdg_df')) {
      un_sdg_df <- un_sdg_df %>%
        bind_rows(temp)
    }
  }
  
}


#save the data file
  
write_excel_csv(un_sdg_df, paste(output_dir, '/un_sdg_df.csv',sep=""))

Summary Statistics by SDG Goal

Below we will produce maps and statistics by region/income group of the SPI Indicators based on the UN SDG database. This will be done according to SDG goal (1-17)

un_sdg_df <- read_csv(paste(output_dir,'/un_sdg_df.csv',sep=""))

spi_mapper('un_sdg_df', '1','Goal 1')

spi_mapper('un_sdg_df', '2','Goal 2')

spi_mapper('un_sdg_df', '3','Goal 3')

spi_mapper('un_sdg_df', '4','Goal 4')

spi_mapper('un_sdg_df', '5','Goal 5')

spi_mapper('un_sdg_df', '6','Goal 6')

spi_mapper('un_sdg_df', '7','Goal 7')

spi_mapper('un_sdg_df', '8','Goal 8')

spi_mapper('un_sdg_df', '9','Goal 9')

spi_mapper('un_sdg_df', '10','Goal 10')

spi_mapper('un_sdg_df', '11','Goal 11')

spi_mapper('un_sdg_df', '12','Goal 12')

spi_mapper('un_sdg_df', '13','Goal 13')

spi_mapper('un_sdg_df', '14','Goal 14')

spi_mapper('un_sdg_df', '15','Goal 15')

spi_mapper('un_sdg_df', '16','Goal 16')

spi_mapper('un_sdg_df', '17','Goal 17')

# stats_fun <- 

Summary Statistics by SDG Group (Social, Economic, Environmental, Institutional)

Below we will produce maps and statistics by region/income group of the SPI Indicators based on the UN SDG database. This will be done by the following classification:

un_sdg_df_grp <- un_sdg_df %>%
  mutate(goal=case_when(
    (goal == '1'| goal == '2' | goal == '3' | goal == '4' | goal == '5' | goal == '6') ~ 'social',
    (goal == '7' | goal == '8' | goal == '9' | goal == '10' | goal == '11' | goal == '12') ~ 'economic',
    (goal == '13' | goal == '14' | goal == '15') ~ 'environmental',
    (goal == '16' | goal == '17') ~ 'institutinoal'
  ))

spi_mapper('un_sdg_df_grp', 'social','Economic Goals (SDG 1-6)')

spi_mapper('un_sdg_df_grp', 'economic','Economic Statistics (SDG goal 7-12)')

spi_mapper('un_sdg_df_grp', 'environmental','Environmental Staistics (SDG goal 13-15)')

spi_mapper('un_sdg_df_grp', 'institutinoal','Institutional Statistics (SDG goal 16-17)')

stats_df <- un_sdg_df %>%
  filter(between(date,2015,2019) ) %>%
    group_by( iso3c, goal,target) %>%
    summarise(ind_quality=100*max(ind_quality), #get a measure by country of whether indicator exists at all over 5 years
              ind_available=100*max(!is.na(ind_value))
    ) %>%
    group_by(iso3c,goal) %>%
    summarise(ind_quality=round(mean(ind_quality, na.rm=T),1), #get an average of this across countries
            ind_available=round(mean(ind_available, na.rm=T),1)
            ) %>%
  left_join(country_metadata) %>%
  filter(!is.na(region)) %>%
  mutate(goal=paste('SDG Goal', goal, sep=" ")) %>%
  select(iso3c, country,region, income, goal,ind_available, ind_quality)


stats_df_wide <- stats_df %>%
  select(iso3c, country,region, income, goal,ind_quality) %>%
  pivot_wider(names_from = 'goal',
              values_from='ind_quality')



#overall sumstats
  stats_sum <- un_sdg_df %>%
    filter(between(date,2015,2019) ) %>%
    group_by( iso3c, goal,target) %>%
    summarise(ind_quality=100*max(ind_quality), #get a measure by country of whether indicator exists at all over 5 years
              ind_available=100*max(!is.na(ind_value))
    ) %>%
    group_by(goal) %>%
    summarise(ind_quality=round(mean(ind_quality, na.rm=T),1), #get an average of this across countries
            ind_available=round(mean(ind_available, na.rm=T),1)
            ) %>%
    arrange(as.numeric(goal)) %>%
    mutate(goal=paste('SDG Goal', goal, sep=" ")) %>%
    select(goal,  ind_available, ind_quality)
  
          DT::datatable(stats_sum, caption="Table of Overall Scores by Goal in 2019",
                      rownames=FALSE,
                      colnames = c("SDG Goal",'Percent Indicators Available over 5 Year Period (2015-19)', "5 Year Average of SPI Score (Availability + Quality) Measure (Scale 0-100)"),
                      class='cell-border stripe',
                      escape = FALSE,
                      extensions = c ('Buttons', 'FixedHeader'), options=list(
                          dom = 'Bfrtip',
                          buttons = c('copy', 'csv', 'excel'),
                          pageLength = 60,
                          scrollX = TRUE,
                          paging=TRUE,
                          ordering=F))
        DT::datatable(stats_df, caption="Table of Scores by Country in 2019",
                      rownames=FALSE,
                      colnames = c("ISO3c", "Country Name","Region", "Income", "SDG Goal",'Percent Indicators Available over 5 Year Period (2015-19)', "5 Year Average of SPI Score (Availability + Quality) Measure (Scale 0-100)"),
                      class='cell-border stripe',
                      escape = FALSE,
                      extensions = c ('Buttons', 'FixedHeader'), options=list(
                          dom = 'Bfrtip',
                          buttons = c('copy', 'csv', 'excel'),
                          pageLength = 60,
                          scrollX = TRUE,
                          paging=TRUE,
                          ordering=F))
#function to create scored data


un_aki_fun <- function(date_start, date_end) {
  temp <- un_sdg_df %>%
    filter(between(date,date_start,date_end) ) %>%
      mutate(ind_quality=if_else(is.na(ind_quality),0,ind_quality)) %>% # if the indicator is missing for a year in the database, set availability to 0.
      group_by(iso3c,goal) %>%
      summarise(ind_quality=round(mean(ind_quality),3) #get an average of this across countries
              ) %>%
    left_join(country_metadata) %>%
    filter(!is.na(region)) %>%
    select(iso3c, country,region, income, goal, ind_quality) %>%
    mutate(date=date_end)
  
  
  temp %>%
    select(iso3c,date, country,region, income, goal,ind_quality) %>%
    pivot_wider(names_from = 'goal',
                values_from='ind_quality',
                names_glue = "SPI.D3.{goal}")
}


####
# 10 Year moving average
####
#create this database for each year from 2004 to 2019 using a 5 year average
for (i in c(2009:2019)) {
  
  start=i-9
  end=i
  
  temp_df <- un_aki_fun(start,end)
  assign(paste('un_aki_',end, sep=""), temp_df)
}

if (exists('un_aki')) {
  rm('un_aki')
}
#now append together and save
for (i in c(2009:2019)) {
  
  temp<-get(paste('un_aki_',i, sep=""))
  
  if (!exists('un_aki')) {
    un_aki<-temp 
  } else {
    un_aki<-un_aki %>%
      bind_rows(temp) %>%
      arrange(-date, iso3c)
  }
}

write_excel_csv(un_aki, path=paste(output_dir, 'SPI_D3_UNSD_data_10yr.csv', sep="/"))

####
# 8 Year moving average
####
#create this database for each year from 2004 to 2019 using a 5 year average
for (i in c(2007:2019)) {
  
  start=i-7
  end=i
  
  temp_df <- un_aki_fun(start,end)
  assign(paste('un_aki_',end, sep=""), temp_df)
}

if (exists('un_aki')) {
  rm('un_aki')
}
#now append together and save
for (i in c(2007:2019)) {
  
  temp<-get(paste('un_aki_',i, sep=""))
  
  if (!exists('un_aki')) {
    un_aki<-temp
  } else {
    un_aki<-un_aki %>%
      bind_rows(temp) %>%
      arrange(-date, iso3c)
  }
}

write_excel_csv(un_aki, path=paste(output_dir, 'SPI_D3_UNSD_data_8yr.csv', sep="/"))

####
# 5 Year moving average
####
#create this database for each year from 2004 to 2019 using a 5 year average
for (i in c(2004:2019)) {
  
  start=i-4
  end=i
  
  temp_df <- un_aki_fun(start,end)
  assign(paste('un_aki_',end, sep=""), temp_df)
}

if (exists('un_aki')) {
  rm('un_aki')
}
#now append together and save
for (i in c(2004:2019)) {
  
  temp<-get(paste('un_aki_',i, sep=""))
  
  if (!exists('un_aki')) {
    un_aki<-temp
  } else {
    un_aki<-un_aki %>%
      bind_rows(temp) %>%
      arrange(-date, iso3c)
  }
}

write_excel_csv(un_aki, path=paste(output_dir, 'SPI_D3_UNSD_data_5yr.csv', sep="/"))
####
# 3 Year moving average
####
#create this database for each year from 2004 to 2019 using a 5 year average
for (i in c(2004:2019)) {
  
  start=i-2
  end=i
  
  temp_df <- un_aki_fun(start,end)
  assign(paste('un_aki_',end, sep=""), temp_df)
}

if (exists('un_aki')) {
  rm('un_aki')
}
#now append together and save
for (i in c(2004:2019)) {
  
  temp<-get(paste('un_aki_',i, sep=""))
  
  if (!exists('un_aki')) {
    un_aki<-temp
  } else {
    un_aki<-un_aki %>%
      bind_rows(temp) %>%
      arrange(-date, iso3c)
  }
}

write_excel_csv(un_aki, path=paste(output_dir, 'SPI_D3_UNSD_data_3yr.csv', sep="/"))

####
# 1 Year moving average
####
#create this database for each year from 2004 to 2019 using a 5 year average
for (i in c(2004:2019)) {
  
  start=i-0
  end=i
  
  temp_df <- un_aki_fun(start,end)
  assign(paste('un_aki_',end, sep=""), temp_df)
}

if (exists('un_aki')) {
  rm('un_aki')
}
#now append together and save
for (i in c(2004:2019)) {
  
  temp<-get(paste('un_aki_',i, sep=""))
  
  if (!exists('un_aki')) {
    un_aki<-temp
  } else {
    un_aki<-un_aki %>%
      bind_rows(temp) %>%
      arrange(-date, iso3c)
  }
}

write_excel_csv(un_aki, path=paste(output_dir, 'SPI_D3_UNSD_data_1yr.csv', sep="/"))

# Comparison with the AKI

Next we will produce correlations between these scores and the scores for countries based on the availability of key indicators metric. The indicators were pulled to represent one from each of the first 10 SDGs, 3 environmental indicators, and 2 indicators representing economic statistcs

Indicator 3.1: social statistics

  • AKI 3.1: Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)
  • AKI 3.2: Food Insecurity Experience Scale
  • AKI 3.3: Mortality rate, under-5 (per 1,000 live births)
  • AKI 3.4: Proportion of children and young people in grades 2 or 3 achieving at least a minimum proficiency level in reading and mathematics, by sex.
  • AKI 3.5: Maternal Mortality
  • AKI 3.6: People using safely managed drinking water services (% of population)

Indicator 3.2: economic statistics

  • AKI 3.7: Access to electricity (% of population)
  • AKI 3.8: Unemployment, total (% of total labor force)
  • AKI 3.9: Manufacturing, value added (% of GDP)
  • AKI 3.10: Annualized average growth rate in per capita real survey mean consumption or income, bottom 40% of population (%)
  • AKI 3.14: Quarterly GDP

Indicator 3.3 environmental statistics

  • AKI 3.11: Level of water stress: freshwater withdrawal as a proportion of available freshwater resources
  • AKI 3.12: Renewable energy consumption (% of total final energy consumption)
  • AKI 3.13: Households and NPISHs Final consumption expenditure (current LCU)

Indicator 3.4: institutional statistics

  • AKI 3.15: Debt service (PPG and IMF only, % of exports of goods, services and primary income)

We will produce scatter plots between these indicators and the indicators pulled from UNSD.

#read in the SPI data with the AKI
aki_df <- read_csv(paste(raw_dir, 'SPI_data.csv', sep='/')) %>%
  select(country, iso3c, date, starts_with('SPI.D3')) %>%
  filter(date==2019)

title <- "Scatterplot of SPI Score for SDG Goal 1 Based on Average Score of All SDG Indicators Versus Score Based on AKI"

sdg <- 'SDG Goal 1'
aki <- 'SPI.D3.POV'

combined_df <- stats_df %>%
  filter(goal==!! sdg) %>%
  left_join(aki_df) %>%
  mutate(across({{aki}}, ~100*., .names="ind_quality_aki")) %>%
  select(country, iso3c, income, ind_quality, ind_quality_aki)

plot_ly(data=combined_df,
        x=~ind_quality, y=~ind_quality_aki,
        text=~country,
        color = ~income,
        type='scatter') %>%
  layout(title=str_wrap(paste('Scatterplot of SPI Score for', sdg, 'Based on Average Score of All SDG Indicators Versus Score Based on AKI'),70),
         xaxis=list(title='Average Score Across All SDG Indicators'),
         yaxis=list(title='Availability of Key Indicator Score'))
plot_fun <-  function(sdg, aki) {
  


combined_df <- stats_df %>%
  filter(goal==!! sdg) %>%
  left_join(aki_df) %>%
  mutate(across({{aki}}, ~100*., .names="ind_quality_aki")) %>%
  select(country, iso3c, income, ind_quality, ind_quality_aki)

p<- plot_ly(data=combined_df,
        x=~ind_quality, y=~ind_quality_aki,
        text=~country,
        color = ~income,
        type='scatter') %>%
  layout(title=str_wrap(paste('Scatterplot of SPI Score for', sdg, 'Based on Average Score of All SDG Indicators Versus Score Based on AKI'),70),
         xaxis=list(title='Average Score Across All SDG Indicators'),
         yaxis=list(title='Availability of Key Indicator Score'))

p


}

sum_fun <- function(sdg, aki) {
  


combined_df <- stats_df %>%
  filter(goal==!! sdg) %>%
  left_join(aki_df) %>%
  mutate(across({{aki}}, ~100*., .names="ind_quality_aki")) %>%
  select(country, iso3c, income, ind_quality, ind_quality_aki)
  
print(cor(combined_df$ind_quality, combined_df$ind_quality_aki, use='pairwise.complete.obs'))
print(summary(combined_df$ind_quality))
print(summary(combined_df$ind_quality_aki))
  
}

plot_fun("SDG Goal 1", 'SPI.D3.POV')
plot_fun("SDG Goal 2", 'SPI.D3.FIES')
plot_fun("SDG Goal 3", 'SPI.D3.CHLD.MORT')
plot_fun("SDG Goal 4", 'SPI.D3.SE.LPV.PRIM.BMP')
plot_fun("SDG Goal 5", 'SPI.D3.MMRT')
plot_fun("SDG Goal 6", 'SPI.D3.SH.H2O.SMDW.ZS')
plot_fun("SDG Goal 7", 'SPI.D3.ELEC')
plot_fun("SDG Goal 8", 'SPI.D3.SL.UEM.TOTL.NE.ZS')
plot_fun("SDG Goal 9", 'SPI.D3.NV.IND.MANF.ZS')
plot_fun("SDG Goal 10", 'SPI.D3.SI.SPR.PC40.ZG')
sum_fun("SDG Goal 1", 'SPI.D3.POV')
## [1] 0.6207392
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   25.00   50.00   53.29  100.00  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   15.00   33.86  100.00  100.00
sum_fun("SDG Goal 2", 'SPI.D3.FIES')
## [1] 0.303543
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   50.00   52.02  100.00  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   43.26  100.00  100.00
sum_fun("SDG Goal 3", 'SPI.D3.CHLD.MORT')
## [1] 0.5330519
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    85.7    87.5    83.1   100.0   100.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   15.00   40.37  100.00  100.00
sum_fun("SDG Goal 4", 'SPI.D3.SE.LPV.PRIM.BMP')
## [1] 0.4025074
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   42.90   57.10   52.32   66.70  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   10.33   30.00   60.00
sum_fun("SDG Goal 5", 'SPI.D3.MMRT')
## [1] 0.1387603
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   16.70   20.00   29.27   40.00  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   40.00   46.37  100.00  100.00
sum_fun("SDG Goal 6", 'SPI.D3.SH.H2O.SMDW.ZS')
## [1] 0.3793264
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      75      80      77     100     100 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00  100.00   50.23  100.00  100.00
sum_fun("SDG Goal 7", 'SPI.D3.ELEC')
## [1] 0.2753561
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00  100.00  100.00   94.73  100.00  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   41.12  100.00  100.00
sum_fun("SDG Goal 8", 'SPI.D3.SL.UEM.TOTL.NE.ZS')
## [1] 0.6415641
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   15.55   33.30   34.27   55.60   77.80 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   30.00  100.00   63.77  100.00  100.00
sum_fun("SDG Goal 9", 'SPI.D3.NV.IND.MANF.ZS')
## [1] 0.3651841
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   33.30   66.70   65.57  100.00  100.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00  100.00  100.00   86.05  100.00  100.00
sum_fun("SDG Goal 10", 'SPI.D3.SI.SPR.PC40.ZG')
## [1] 0.8963959
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   25.00   33.30   41.27   66.70   66.70 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   12.42   30.00   30.00